{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>"
      ],
      "text/vnd.plotly.v1+html": [
       "<script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import re\n",
    "import sklearn\n",
    "import xgboost as xgb\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "import plotly.offline as py\n",
    "py.init_notebook_mode(connected=True)\n",
    "import plotly.graph_objs as go\n",
    "import plotly.tools as tls\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import KFold\n",
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# load in train and test datasets\n",
    "train = pd.read_csv(\"./data/train.csv\")\n",
    "test = pd.read_csv(\"./data/test.csv\")\n",
    "\n",
    "#Store our passenger ID for easy access\n",
    "PassengerId = test['PassengerId']\n",
    "\n",
    "train.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "full_data = [train, test]\n",
    "\n",
    "train['Name_length'] = train['Name'].apply(len)\n",
    "test['Name_length'] = test['Name'].apply(len)\n",
    "\n",
    "train['Has_Cabin'] = train['Cabin'].apply(lambda x:0 if type(x) == float else 1)\n",
    "test['Has_Cabin'] = test['Cabin'].apply(lambda x:0 if type(x) == float else 1)\n",
    "\n",
    "#Feature engineering\n",
    "#Create new feature FamilySize as a combination of sibsp and Parch\n",
    "for dataset in full_data:\n",
    "    dataset['FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1\n",
    "# Create new feature IsAlone from FamilySize\n",
    "# for dataset in full_data:\n",
    "#     dataset['IsAlone'] = 0\n",
    "#     dataset.loc[dataset['FamilySize'] == 1, 'IsAlone' ] = 1\n",
    "\n",
    "for dataset in full_data:\n",
    "    dataset['Embarked'] = dataset['Embarked'].fillna('S')\n",
    "for dataset in full_data:\n",
    "    dataset['Fare']= dataset['Fare'].fillna(train['Fare'].median())\n",
    "train['CategoricalFare'] = pd.qcut(train['Fare'],4)\n",
    "\n",
    "for dataset in full_data:\n",
    "    age_avg = dataset['Age'].mean()\n",
    "    age_std = dataset['Age'].std()\n",
    "    age_null_count = dataset['Age'].isnull().sum()\n",
    "    age_null_random_list = np.random.randint(age_avg - age_std, age_avg + age_std, size=age_null_count)\n",
    "    dataset['Age'][np.isnan(dataset['Age'])] = age_null_random_list\n",
    "    dataset['Age'] = dataset['Age'].astype(int)\n",
    "# train['CategoricalAge'] = pd.cut(train['Age'], 5)\n",
    "# # Define the function to extract titles from passengers' names \n",
    "def get_title(name):\n",
    "    title_search = re.search(' ([A-Za-z]+)\\.', name)\n",
    "    if title_search:\n",
    "        return title_search.group(1)\n",
    "    return \"\"\n",
    "# Create a new feature Title, containing the titles of passenger names\n",
    "for dataset in full_data:\n",
    "    dataset['Title'] = dataset['Name'].apply(get_title)\n",
    "    \n",
    "# Group all non-common titles into one single grouping \"Rare\"\n",
    "for dataset in full_data:\n",
    "    dataset['Title'] = dataset['Title'].replace(['Lady','Countess','Capt','Col','Don',\n",
    "                                                 'Dr','Major','Rev','Sir','Jonkheer','Dona'],'Rare')\n",
    "    dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')\n",
    "    dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')\n",
    "    dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')\n",
    "\n",
    "for dataset in full_data:\n",
    "    dataset['Sex'] = dataset['Sex'].map( {'female':0, 'male':1}).astype(int)\n",
    "    title_mapping = {\"Mr\":1 ,\"Miss\":2, \"Mrs\":3 ,\"Master\": 4 ,\"Rare\":5}\n",
    "    dataset['Title'] = dataset['Title'].map(title_mapping)\n",
    "    dataset['Title'] = dataset['Title'].fillna(0)\n",
    "    \n",
    "    dataset['Embarked'] = dataset['Embarked'].map({'S':0, 'C':1 ,'Q':2}).astype(int)\n",
    "    dataset.loc[ dataset['Fare'] <= 7.91, 'Fare']  = 0\n",
    "    dataset.loc[ (dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1\n",
    "    dataset.loc[ (dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare'] = 2\n",
    "    dataset.loc[ (dataset['Fare'] > 31), 'Fare'] = 3\n",
    "    dataset['Fare'] = dataset['Fare'].astype(int)\n",
    "    \n",
    "    dataset.loc[ dataset['Age'] <= 16, 'Age' ] = 0\n",
    "    dataset.loc[(dataset['Age'] > 16) & (dataset['Age'] <= 32), 'Age'] = 1\n",
    "    dataset.loc[(dataset['Age'] > 32) & (dataset['Age'] <= 48), 'Age'] = 2\n",
    "    dataset.loc[(dataset['Age'] > 48) & (dataset['Age'] <= 64), 'Age'] = 3\n",
    "    dataset.loc[ dataset['Age'] > 64, 'Age'] = 4 ;\n",
    "    \n",
    "# Feature selection\n",
    "drop_elements = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'SibSp', 'Name_length']\n",
    "train = train.drop(drop_elements, axis = 1)\n",
    "train = train.drop(['CategoricalFare'], axis = 1)\n",
    "test  = test.drop(drop_elements, axis = 1)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Embarked</th>\n",
       "      <th>Has_Cabin</th>\n",
       "      <th>FamilySize</th>\n",
       "      <th>Title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Survived  Pclass  Sex  Age  Parch  Fare  Embarked  Has_Cabin  FamilySize  \\\n",
       "0         0       3    1    1      0     0         0          0           2   \n",
       "1         1       1    0    2      0     3         1          1           2   \n",
       "2         1       3    0    1      0     1         0          0           1   \n",
       "3         1       1    0    2      0     3         0          1           2   \n",
       "4         0       3    1    2      0     1         0          0           1   \n",
       "\n",
       "   Title  \n",
       "0      1  \n",
       "1      3  \n",
       "2      2  \n",
       "3      3  \n",
       "4      1  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SklearnHelper(object):\n",
    "    def __init__(self, clf, seed=0, params=None):\n",
    "        params['random_state'] = seed\n",
    "        self.clf = clf(**params)\n",
    "        \n",
    "    def train(self, x_train, y_train):\n",
    "        self.clf.fit(x_train, y_train)\n",
    "    \n",
    "    def predict(self, x):\n",
    "        return self.clf.predict(x)\n",
    "    \n",
    "    def fit(self, x, y):\n",
    "        return self.clf.fit(x,y)\n",
    "    \n",
    "    def feature_importances(self, x, y):\n",
    "        print(self.clf.fit(x,y).feature_importances_)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "ntrain = train.shape[0]\n",
    "ntest = test.shape[0]\n",
    "SEED = 0\n",
    "NFOLDS = 5\n",
    "kf = KFold(n_splits=NFOLDS, shuffle=False ,random_state=SEED) #K-交叉验证\n",
    "\n",
    "#kf = RepeatedKFold(n_splits=NFOLDS, n_repeats =2 ,random_state=SEED) #K-交叉重复验证\n",
    "def get_oof(clf, x_train, y_train, x_test):   #x_train 训练集 y_train 训练集标签 x_test测试集\n",
    "    oof_train = np.zeros((ntrain,))\n",
    "    oof_test = np.zeros((ntest,))\n",
    "    oof_test_skf = np.empty((NFOLDS, ntest))\n",
    "    \n",
    "    for i,(train_index, test_index) in enumerate(kf.split(x_train)): #train_index 被分到训练集中的index  同理test_index\n",
    "        x_tr = x_train[train_index]\n",
    "        y_tr = y_train[train_index]\n",
    "        x_te = x_train[test_index]\n",
    "        y_te = y_train[test_index]\n",
    "        clf.train(x_tr, y_tr)\n",
    "        \n",
    "        oof_train[test_index] = clf.predict(x_te) # 用验证数据集验证训练集训练好的模型\n",
    "        oof_test_skf[i, :] = clf.predict(x_test)  # 用测试集测试\n",
    "        print(\"SVC K-fold: %d accuracy score is %f \" % (i,accuracy_score(y_te.reshape(-1),clf.predict(x_te).reshape(-1))))\n",
    "    oof_test[:] = oof_test_skf.mean(axis=0)\n",
    "    return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Numpy arrays of train, test and target ( Survived) dataframes to feed into our models\n",
    "y_train = train['Survived'].ravel()\n",
    "train = train.drop(['Survived'], axis=1)\n",
    "x_train = train.values # Creates an array of the train data\n",
    "x_test = test.values # Creats an array of the test data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Support Vector Classifier parameters \n",
    "svc_params = {\n",
    "    'kernel' : 'linear',\n",
    "    'C' : 0.0453\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "svc = SklearnHelper(clf=SVC, seed=SEED, params=svc_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SVC K-fold: 0 accuracy score is 0.821229 \n",
      "SVC K-fold: 1 accuracy score is 0.808989 \n",
      "SVC K-fold: 2 accuracy score is 0.814607 \n",
      "SVC K-fold: 3 accuracy score is 0.775281 \n",
      "SVC K-fold: 4 accuracy score is 0.837079 \n",
      "Training is complete\n"
     ]
    }
   ],
   "source": [
    "svc_oof_train, svc_oof_test = get_oof(svc,x_train, y_train, x_test) # Support Vector Classifier\n",
    "\n",
    "print(\"Training is complete\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.811447811448\n"
     ]
    }
   ],
   "source": [
    "print(accuracy_score(y_train,svc_oof_train.reshape(-1)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "cannot insert Predict, already exists",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-67-5eed6d5eb282>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minsert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Predict'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msvc_oof_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint8\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mPredict_Error\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Survived'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Predict'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPredict_Error\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Pclass'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPredict_Error\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Pclass'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36minsert\u001b[0;34m(self, loc, column, value, allow_duplicates)\u001b[0m\n\u001b[1;32m   2611\u001b[0m         \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sanitize_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbroadcast\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2612\u001b[0m         self._data.insert(loc, column, value,\n\u001b[0;32m-> 2613\u001b[0;31m                           allow_duplicates=allow_duplicates)\n\u001b[0m\u001b[1;32m   2614\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2615\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0massign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36minsert\u001b[0;34m(self, loc, item, value, allow_duplicates)\u001b[0m\n\u001b[1;32m   4056\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mallow_duplicates\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4057\u001b[0m             \u001b[0;31m# Should this be a different kind of error??\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4058\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'cannot insert {}, already exists'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4059\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4060\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: cannot insert Predict, already exists"
     ]
    }
   ],
   "source": [
    "# 分析分类错误数据\n",
    "#print(svc_oof_train)\n",
    "\n",
    "\n",
    "train.insert(1,'Predict',svc_oof_train.astype(np.int8))\n",
    "Predict_Error=train[train['Survived'] != train['Predict']]\n",
    "print(Predict_Error['Pclass'][Predict_Error['Pclass']==1].count())\n",
    "print(Predict_Error['Pclass'][Predict_Error['Pclass']==2].count())\n",
    "print(Predict_Error['Pclass'][Predict_Error['Pclass']==3].count())\n",
    "Predict_Error.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "#直接某个基学习器\n",
    "#print(rf_oof_test.round())\n",
    "StackingSubmission = pd.DataFrame({ 'PassengerId': PassengerId,\n",
    "                            'Survived': svc_oof_test.round().reshape(-1).astype(np.int8)})\n",
    "StackingSubmission.to_csv(\"SVCSubmission.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
